Manuscript_gap_analysis

Author

Théophile L. Mouton

Published

November 5, 2025

Load the libraries

Code
# Load necessary libraries
library(raster)
library(here)
library(sp)
library(dplyr)
library(tidyr)
library(ggplot2)
library(dplyr)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(smoothr)
library(terra)  # For raster operations
library(raster)
library(parallel)
library(pbapply)
library(knitr)
library(kableExtra)
library(patchwork)
library(gridExtra)
library(tidyverse)
library(ggridges)
library(betareg)
library(margins)
library(ggpubr)
library(knitr)

Load the data

Code
# Species presence grid 
load(here::here("Data", "GAP analyses", "puvsp_marine.Rdata"))

# No-take MPA raster layer
mpa_NT <- raster::raster(here::here("Data", "GAP analyses", "mpa_NT_binary.tif"))

# High seas shapefile 
# High seas shapefile 
High_seas = st_read(here::here("Data", "World_High_Seas_v2_20241010", "High_Seas_v2.shp"), quiet = TRUE)

# Ecoregion shapefile
meow_ecos <- st_read(here("Data", "Shapefiles", "meow_ecos", "meow_ecos.shp"), quiet = TRUE)

# EEZ shapefile 
eez <- st_read(here("Data", "World_EEZ_v12_20231025", "eez_v12.shp"), quiet = TRUE)

# Bathymetry raster layer
Bathy <- raster(here::here("Data", "bathymetry-0.1deg-adjusted.tif"))

# IUCN statuses
IUCN <- readRDS(here::here("Data", "GAP analyses", "sharks_iucn_final.RDS"))
IUCN$species <- gsub("_", " ", IUCN$Species)

GAP analyses

GAP analyses of shark and ray range overlaps with Marine Protected Areas

Code
# Convert species dataframe to spatial points
species_points <- SpatialPointsDataFrame(coords = puvsp_marine[, c("lon", "lat")], 
                                         data = puvsp_marine, 
                                         proj4string = CRS(projection(mpa_NT)))

# Extract MPA values for each species point
mpa_NT_values <- raster::extract(mpa_NT, species_points)

# Add MPA values to the species dataframe
puvsp_marine$mpa_NT_present <- mpa_NT_values

# Function to calculate percentage of range in MPA
calculate_mpa_percentage <- function(species_column, mpa_column) {
  species_presence <- species_column == 1  # Assuming 1 indicates species presence
  total_range <- sum(species_presence, na.rm = TRUE)
  range_in_mpa <- sum(species_presence & mpa_column == 1, na.rm = TRUE)
  
  if (total_range == 0) {
    return(NA)  # Return NA if the species is not present anywhere
  }
  
  percentage <- (range_in_mpa / total_range) * 100
  return(percentage)
}

# Apply function to all species columns for both MPA types
species_columns <- 4:(ncol(puvsp_marine) - 2)  # Assuming species columns start at 4

mpa_NT_percentages <- sapply(puvsp_marine[, species_columns], 
                             function(x) calculate_mpa_percentage(x, puvsp_marine$mpa_NT_present))

# Create a dataframe with results
results <- data.frame(
  species = names(mpa_NT_percentages),
  percentage_in_NT_MPA = mpa_NT_percentages
)

# Remove any NA results
results <- results[!is.na(results$percentage_in_NT_MPA), ]

# Sort results by percentage in no-take MPAs (descending)
results <- results[order(-results$percentage_in_NT_MPA), ]

# Display top 10 species
#print(head(results, 10))

# Summary statistics
summary_stats <- data.frame(
  Statistic = c("Min", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max", "SD"),
  Value = c(
    summary(results$percentage_in_NT_MPA)[1],
    summary(results$percentage_in_NT_MPA)[2],
    summary(results$percentage_in_NT_MPA)[3],
    summary(results$percentage_in_NT_MPA)[4],
    summary(results$percentage_in_NT_MPA)[5],
    summary(results$percentage_in_NT_MPA)[6],
    sd(results$percentage_in_NT_MPA)
  )
)

kable(summary_stats, caption = "Summary Statistics for No-Take MPAs", digits = 2)
Summary Statistics for No-Take MPAs
Statistic Value
Min. Min 0.00
1st Qu. 1st Qu. 0.00
Median Median 0.78
Mean Mean 2.58
3rd Qu. 3rd Qu. 2.83
Max. Max 50.00
SD 5.40
Code
# Save results to CSV
write.csv(results, here::here("Data","species_mpa_coverage_NT.csv"), row.names = FALSE)

Map of mean % range covered

Code
# Convert results to a named vector for easier lookup
species_NT_percentages <- setNames(results$percentage_in_NT_MPA, results$species)

# Create a new dataframe to store results
grid_data <- puvsp_marine %>%
  dplyr::select(lon, lat) %>%
  mutate(
    species_count = 0,
    sum_percentages = 0
  )

# For each species, add its coverage percentage to cells where it's present
for (sp_name in names(species_NT_percentages)) {
  # Find the column index in the original data
  col_idx <- which(names(puvsp_marine) == sp_name)
  
  if (length(col_idx) > 0) {
    # Find cells where this species is present
    present_cells <- which(puvsp_marine[[col_idx]] == 1)
    
    if (length(present_cells) > 0) {
      # Add to species count
      grid_data$species_count[present_cells] <- grid_data$species_count[present_cells] + 1
      
      # Add the species' NT percentage to the sum
      grid_data$sum_percentages[present_cells] <- 
        grid_data$sum_percentages[present_cells] + species_NT_percentages[sp_name]
    }
  }
}

# Calculate mean percentage for each grid cell
grid_data <- grid_data %>%
  mutate(mean_NT_coverage = ifelse(species_count > 0, sum_percentages / species_count, NA)) %>%
  filter(!is.na(mean_NT_coverage))

# Exclude high seas grid cells ---- 

# Create high seas template raster
template_raster <- rast(ext(High_seas), resolution = 0.5)

# Rasterize high seas polygon
highseas_raster <- rasterize(vect(High_seas), template_raster, field = 1, background = 0)

# Convert grid_data to terra SpatVector for extraction
grid_points <- vect(as.data.frame(grid_data), 
                   geom = c("lon", "lat"), 
                   crs = crs(highseas_raster))

# Extract values from highseas raster
highseas_values <- terra::extract(highseas_raster, grid_points)

# Add highseas classification back to the dataframe
grid_data$is_highseas <- highseas_values[, 2]
grid_data$is_highseas[is.na(grid_data$is_highseas)] <- 0  # Set NA to 0 (continental)

# Filter to keep only continental waters (is_highseas = 0)
grid_data_continental <- grid_data %>%
  filter(is_highseas == 0)

# Define the projection
mcbryde_thomas_2 <- "+proj=mbt_s"

# Convert grid data to sf object and project (continental waters only)
grid_data_sf <- grid_data_continental %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
  st_transform(crs = mcbryde_thomas_2)

# Get and project land
land_projected <- ne_countries(scale = "medium", returnclass = "sf") %>%
  st_transform(crs = mcbryde_thomas_2)

# Create the globe border
globe_bbox <- rbind(c(-180, -90), c(-180, 90), 
                    c(180, 90), c(180, -90), c(-180, -90))

globe_border <- st_polygon(list(globe_bbox)) %>%
  st_sfc(crs = 4326) %>%
  st_sf(data.frame(rgn = 'globe', geom = .)) %>%
  smoothr::densify(max_distance = 0.5) %>%
  st_transform(crs = mcbryde_thomas_2)

# Create base theme
my_theme <- theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.direction = "horizontal",
    legend.box = "vertical",
    legend.margin = margin(t = 20, r = 0, b = 0, l = 0),
    legend.title = element_text(margin = margin(b = 10)),
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    panel.grid = element_blank(),
    axis.text = element_blank(),      # Add this line
    axis.ticks = element_blank()      # Add this line
  )

# Calculate the data range for better color scaling
max_value <- max(grid_data_continental$mean_NT_coverage, na.rm = TRUE)
color_breaks <- seq(0, ceiling(max_value / 10) * 10, by = 2)

# Map with built-in transformation
nt_map_projected <- ggplot() +
  geom_sf(data = grid_data_sf, aes(color = mean_NT_coverage), size = 0.5, alpha = 0.7) +
  geom_sf(data = land_projected, fill = "lightgrey", color = "lightgrey", size = 0.1) +
  geom_sf(data = globe_border, fill = NA, color = "black", size = 0.5) +
  scale_color_viridis_c(
    option = "viridis",
    name = "Mean % range in no-take MPAs",
    trans = "sqrt",  # Use "log10" for log scale
    breaks = color_breaks,
    guide = guide_colorbar(barwidth = 20, barheight = 0.5, 
                         title.position = "top", title.hjust = 0.5)
  ) +
  coord_sf() +
  labs(
   # title = "Mean Percentage of Species Range in No-Take MPAs",
  #  subtitle = "Continental Waters Only (Square Root Scale)",
    x = NULL, y = NULL
  ) +
#  annotate("text", x = -Inf, y = Inf, label = "(A)", 
#           hjust = -1, vjust = 2, size = 6, fontface = "bold") +
  my_theme

# Print the projected map
print(nt_map_projected)

Code
# Save the projected map (continental waters only)
ggsave(here::here("Outputs","NT_MPA_coverage_continental.png"), nt_map_projected, width = 10, height = 7, dpi = 300)

# Compute statistics
continental_stats <- data.frame(
  Statistic = c("Mean", "Max", "Min", "Median", "Standard Deviation"),
  Value = c(
    mean(grid_data_continental$mean_NT_coverage, na.rm = TRUE),
    max(grid_data_continental$mean_NT_coverage, na.rm = TRUE),
    min(grid_data_continental$mean_NT_coverage, na.rm = TRUE),
    median(grid_data_continental$mean_NT_coverage, na.rm = TRUE),
    sd(grid_data_continental$mean_NT_coverage, na.rm = TRUE)
  )
)

# Display as a formatted table
kable(continental_stats, caption = "Continental Waters Statistics", digits = 2)
Continental Waters Statistics
Statistic Value
Mean 1.35
Max 13.45
Min 0.00
Median 1.27
Standard Deviation 0.68

Breakdown by realm, ecoregion and EEZ

Code
# Convert grid_data_continental to sf for spatial operations
grid_sf <- grid_data_continental %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4326)

#=====================
# Calculate by Realm
#=====================
# Extract realms from meow_ecos
realms <- meow_ecos %>%
  group_by(REALM) %>%
  summarize(geometry = st_union(geometry)) %>%
  ungroup()

# Spatial join with realms
grid_realms <- st_join(grid_sf, realms)

# Calculate mean coverage by realm
realm_stats <- grid_realms %>%
  st_drop_geometry() %>%
  group_by(REALM) %>%
  summarize(
    mean_NT_coverage = mean(mean_NT_coverage, na.rm = TRUE),
    median_NT_coverage = median(mean_NT_coverage, na.rm = TRUE),
    sd_NT_coverage = sd(mean_NT_coverage, na.rm = TRUE),
    n_cells = n()
  ) %>%
  arrange(desc(mean_NT_coverage))

#======================
# Calculate by Ecoregion
#======================
# Spatial join with ecoregions
grid_ecoregions <- st_join(grid_sf, meow_ecos)

# Calculate mean coverage by ecoregion
ecoregion_stats <- grid_ecoregions %>%
  st_drop_geometry() %>%
  group_by(ECOREGION) %>%
  summarize(
    PROVINCE = first(PROVINCE),
    REALM = first(REALM),
    mean_NT_coverage = mean(mean_NT_coverage, na.rm = TRUE),
    median_NT_coverage = median(mean_NT_coverage, na.rm = TRUE),
    sd_NT_coverage = sd(mean_NT_coverage, na.rm = TRUE),
    n_cells = n()
  ) %>%
  arrange(desc(mean_NT_coverage))

#======================
# Calculate by EEZ
#======================
# Fix potential invalid geometries in EEZ data
eez_valid <- st_make_valid(eez)

# Spatial join with EEZ
grid_eez <- st_join(grid_sf, eez_valid %>% dplyr::select(TERRITORY1, SOVEREIGN1, ISO_TER1, MRGID_TER1))

# Calculate mean coverage by EEZ
eez_stats <- grid_eez %>%
  st_drop_geometry() %>%
  group_by(TERRITORY1, SOVEREIGN1) %>%
  summarize(
    ISO_TER1 = first(ISO_TER1),
    MRGID_TER1 = first(MRGID_TER1),
    mean_NT_coverage = mean(mean_NT_coverage, na.rm = TRUE),
    median_NT_coverage = median(mean_NT_coverage, na.rm = TRUE),
    sd_NT_coverage = sd(mean_NT_coverage, na.rm = TRUE),
    n_cells = n()
  ) %>%
  arrange(desc(mean_NT_coverage))

# Print summary tables
cat("Top 10 Realms by Mean No-Take MPA Coverage:\n")
Top 10 Realms by Mean No-Take MPA Coverage:
Code
print(head(realm_stats, 10))
# A tibble: 10 × 5
   REALM              mean_NT_coverage median_NT_coverage sd_NT_coverage n_cells
   <chr>                         <dbl>              <dbl>          <dbl>   <int>
 1 Temperate Austral…             1.79               1.79             NA    2109
 2 Central Indo-Paci…             1.78               1.78             NA    9292
 3 Southern Ocean                 1.73               1.73             NA    1504
 4 Tropical Atlantic              1.60               1.60             NA    4510
 5 Western Indo-Paci…             1.51               1.51             NA    4680
 6 Tropical Eastern …             1.49               1.49             NA    1357
 7 Eastern Indo-Paci…             1.44               1.44             NA    6303
 8 <NA>                           1.16               1.16             NA    5817
 9 Temperate South A…             1.15               1.15             NA    2432
10 Temperate Souther…             1.15               1.15             NA     694
Code
cat("\nTop 10 Ecoregions by Mean No-Take MPA Coverage:\n")

Top 10 Ecoregions by Mean No-Take MPA Coverage:
Code
print(head(ecoregion_stats, 10))
# A tibble: 10 × 7
   ECOREGION   PROVINCE REALM mean_NT_coverage median_NT_coverage sd_NT_coverage
   <chr>       <chr>    <chr>            <dbl>              <dbl>          <dbl>
 1 Heard and … Subanta… Sout…             5.96               5.96             NA
 2 Central an… Northea… Cent…             4.97               4.97             NA
 3 Ross Sea    Contine… Sout…             4.90               4.90             NA
 4 Coral Sea   Tropica… Cent…             4.52               4.52             NA
 5 Kerguelen … Subanta… Sout…             3.78               3.78             NA
 6 Cortezian   Warm Te… Temp…             3.39               3.39             NA
 7 Torres Str… Northea… Cent…             3.17               3.17             NA
 8 Arnhem Coa… Sahul S… Cent…             3.16               3.16             NA
 9 Tweed-More… East Ce… Temp…             3.10               3.10             NA
10 Gulf of Pa… Sahul S… Cent…             2.75               2.75             NA
# ℹ 1 more variable: n_cells <int>
Code
cat("\nTop 10 EEZs by Mean No-Take MPA Coverage:\n")

Top 10 EEZs by Mean No-Take MPA Coverage:
Code
print(head(eez_stats, 10))
# A tibble: 10 × 8
# Groups:   TERRITORY1 [10]
   TERRITORY1 SOVEREIGN1 ISO_TER1 MRGID_TER1 mean_NT_coverage median_NT_coverage
   <chr>      <chr>      <chr>         <dbl>            <dbl>              <dbl>
 1 Heard and… Australia  HMD            8631             4.77               4.77
 2 Kerguélen  France     ATF            8630             3.32               3.32
 3 Sint-Maar… Netherlan… SXM           21802             2.83               2.83
 4 Quitasueñ… Colombia   <NA>           5765             2.83               2.83
 5 Saint-Bar… France     BLM           19099             2.80               2.80
 6 Saba       Netherlan… BES           19095             2.74               2.74
 7 Collectiv… France     MAF            8688             2.71               2.71
 8 Belize     Belize     BLZ            8681             2.60               2.60
 9 Australia  Australia  AUS            2147             2.55               2.55
10 Saint Kit… Saint Kit… KNA            8644             2.51               2.51
# ℹ 2 more variables: sd_NT_coverage <dbl>, n_cells <int>
Code
#======================
# Create visualizations
#======================

# 1. Choropleth map of ecoregions
# Join stats back to spatial data
meow_ecos_stats <- meow_ecos %>%
  left_join(ecoregion_stats, by = "ECOREGION")

# Plot ecoregions map
eco_map <- ggplot() +
  geom_sf(data = meow_ecos_stats, aes(fill = mean_NT_coverage), color = NA) +
  geom_sf(data = ne_countries(scale = "medium", returnclass = "sf"), 
          fill = "lightgrey", color = "darkgrey", size = 0.1) +
  scale_fill_viridis_c(
    name = "Mean % range in no-take MPAs",
    na.value = "grey90",
    trans = "sqrt",
    guide = guide_colorbar(barwidth = 20, barheight = 0.5,
                           title.position = "top", title.hjust = 0.5)
  ) +
  theme_minimal() +
  labs(
    #title = "Mean No-Take MPA Coverage by Marine Ecoregion",
    x = NULL, y = NULL
  ) +
  theme(
    legend.position = "bottom",
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank()
  )

# Save ecoregion map
ggsave(here("Outputs", "ecoregion_NT_coverage_map.png"), eco_map, width = 12, height = 8, dpi = 300)

# 2. Choropleth map of EEZs
# Join stats back to spatial data
eez_stats_map <- eez_valid %>%
  left_join(eez_stats, by = c("TERRITORY1", "SOVEREIGN1"))

# Plot EEZ map
eez_map <- ggplot() +
  geom_sf(data = eez_stats_map, aes(fill = mean_NT_coverage), color = NA) +
  geom_sf(data = ne_countries(scale = "medium", returnclass = "sf"), 
          fill = "lightgrey", color = "darkgrey", size = 0.1) +
  scale_fill_viridis_c(
    name = "Mean % range in No-Take MPAs",
    na.value = "grey90",
    trans = "sqrt",
    guide = guide_colorbar(barwidth = 20, barheight = 0.5,
                           title.position = "top", title.hjust = 0.5)
  ) +
  theme_minimal() +
  labs(
    title = "Mean No-Take MPA Coverage by Exclusive Economic Zone (EEZ)",
    x = NULL, y = NULL
  ) +
  theme(
    legend.position = "bottom",
    panel.grid = element_blank()
  )

# Save EEZ map
ggsave(here("Outputs", "eez_NT_coverage_map.png"), eez_map, width = 12, height = 8, dpi = 300)

# 3. Bar plots for top regions
# Top 20 Ecoregions
top_eco_plot <- ggplot(head(ecoregion_stats, 20), aes(x = reorder(ECOREGION, mean_NT_coverage), y = mean_NT_coverage)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, 
                   ymax = mean_NT_coverage + sd_NT_coverage), 
               width = 0.2) +
  coord_flip() +
  labs(
   # title = "Top 20 Marine Ecoregions by Mean No-Take MPA Coverage",
    x = NULL,
    y = "Mean % range in no-take MPAs"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8)
  )

# Save top ecoregions plot
ggsave(here("Outputs", "top_ecoregions_NT_coverage.png"), top_eco_plot, width = 10, height = 8, dpi = 300)

# Bottom 20 Ecoregions
bottom_eco_plot <- ggplot(tail(ecoregion_stats, 20), aes(x = reorder(ECOREGION, mean_NT_coverage), y = mean_NT_coverage)) +
  geom_bar(stat = "identity", fill = "coral") +
  geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, 
                   ymax = mean_NT_coverage + sd_NT_coverage), 
               width = 0.2) +
  coord_flip() +
  labs(
 #   title = "Bottom 20 Marine Ecoregions by Mean No-Take MPA Coverage",
    x = NULL,
    y = "Mean % range in no-take MPAs"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8)
  )

# Save bottom ecoregions plot
ggsave(here("Outputs", "bottom_ecoregions_NT_coverage.png"), bottom_eco_plot, width = 10, height = 8, dpi = 300)

# Top 20 EEZs
top_eez_plot <- ggplot(head(eez_stats, 20), aes(x = reorder(TERRITORY1, mean_NT_coverage), y = mean_NT_coverage)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, 
                   ymax = mean_NT_coverage + sd_NT_coverage), 
               width = 0.2) +
  coord_flip() +
  labs(
    title = "Top 20 EEZs by Mean No-Take MPA Coverage",
    x = NULL,
    y = "Mean % Range in No-Take MPAs"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8)
  )

# Save top EEZs plot
ggsave(here("Outputs", "top_eezs_NT_coverage.png"), top_eez_plot, width = 10, height = 8, dpi = 300)

Null model of MPA placement within EEZs

Null model of MPA placement and overlaps with the range of sharks and rays

Code
# Create marine mask from bathymetry (areas < 0)
marine_mask <- Bathy < 0

# Resample marine mask to match MPA raster resolution
marine_mask_resampled <- raster::resample(marine_mask, mpa_NT, method = "ngb")

# Ensure marine_mask_resampled is binary (0 or 1)
marine_mask_resampled <- raster::reclassify(marine_mask_resampled, c(-Inf, 0.5, 0, 0.5, Inf, 1))

# Function to create a raster mask of EEZs with unique values for each country
create_eez_mask <- function(template_raster) {
  # Ensure EEZ has a unique identifier for each country
  eez$country_id <- as.numeric(as.factor(eez$SOVEREIGN1))
  
  # Rasterize the EEZ shapefile using the country_id
  eez_raster <- rasterize(eez, template_raster, field = "country_id")
  
  return(eez_raster)
}

# Updated create_random_mpa function to randomize within each country's EEZ
create_random_mpa <- function(template_raster, marine_mask, eez_mask) {
  # Ensure template_raster is binary
  template_raster <- raster::reclassify(template_raster, c(-Inf, 0.5, 0, 0.5, Inf, 1))
  
  # Combine marine_mask and eez_mask
  combined_mask <- marine_mask * (eez_mask > 0)
  
  # Get unique country IDs
  country_ids <- unique(eez_mask[!is.na(eez_mask)])
  country_ids <- country_ids[country_ids > 0]
  
  # Initialize random raster
  random_raster <- template_raster
  random_raster[] <- NA
  
  for (country_id in country_ids) {
    # Create mask for current country
    country_mask <- eez_mask == country_id
    
    # Count original marine MPA cells within current country's EEZ
    original_mpa_cells <- sum(template_raster[] == 1 & country_mask[] & !is.na(combined_mask[]), na.rm = TRUE)
    
    # Get all valid cells for current country (marine areas within EEZ)
    valid_cells <- which(country_mask[] & !is.na(combined_mask[]))
    total_valid_cells <- length(valid_cells)
    
    if (original_mpa_cells > 0 && total_valid_cells > 0) {
      if (original_mpa_cells > total_valid_cells) {
        warning(paste("More MPA cells than valid marine cells for country ID", country_id, ". Adjusting MPA cell count."))
        original_mpa_cells <- total_valid_cells
      }
      
      # Randomly select valid cells to be MPAs for current country
      mpa_cells <- sample(valid_cells, size = min(original_mpa_cells, total_valid_cells), replace = FALSE)
      
      # Update random raster
      random_raster[valid_cells] <- 0
      random_raster[mpa_cells] <- 1
    }
  }
  
  return(random_raster)
}

# Calculate MPA percentage function
calculate_mpa_percentage <- function(species_values, mpa_values) {
  total_cells <- sum(!is.na(species_values))
  mpa_cells <- sum(mpa_values == 1 & !is.na(species_values), na.rm = TRUE)
  percentage <- (mpa_cells / total_cells) * 100
  return(percentage)
}

# Main analysis function
run_random_mpa_analysis <- function(species_data, mpa_raster, marine_mask, eez_mask, n_iterations = 100) {
  # Convert species dataframe to spatial points
  species_points <- sp::SpatialPointsDataFrame(coords = species_data[, c("lon", "lat")], 
                                               data = species_data, 
                                               proj4string = sp::CRS(raster::projection(mpa_raster)))
  
  # Prepare results storage
  species_columns <- 4:(ncol(species_data) - 1)  # Adjust if needed
  all_results <- matrix(nrow = length(species_columns), ncol = n_iterations)
  rownames(all_results) <- names(species_data)[species_columns]
  
  # Use pblapply for parallel processing with a progress bar
  all_results <- pblapply(1:n_iterations, function(i) {
    set.seed(i)  # Set seed for reproducibility
    random_mpa <- create_random_mpa(mpa_raster, marine_mask, eez_mask)
    random_mpa_values <- raster::extract(random_mpa, species_points)
    sapply(species_data[, species_columns], 
           function(x) calculate_mpa_percentage(x, random_mpa_values))
  }, cl = detectCores() - 1)  # Use one less than available cores
  
  all_results <- do.call(cbind, all_results)
  
  # Calculate mean and standard deviation for each species
  mean_percentages <- rowMeans(all_results, na.rm = TRUE)
  sd_percentages <- apply(all_results, 1, sd, na.rm = TRUE)
  
  results <- data.frame(
    species = rownames(all_results),
    mean_percentage = mean_percentages,
    sd_percentage = sd_percentages
  )
  
  return(results)
}

# Function to process results
process_results <- function(results, mpa_type) {
  # Sort results by mean_percentage in descending order
  results <- results[order(-results$mean_percentage), ]
  
  # Calculate summary statistics
  summary_stats <- summary(results$mean_percentage)
  
  # Write results to CSV
  write.csv(results, here::here("Outputs",paste0("species_random_", mpa_type, "_coverage.csv")), row.names = FALSE)
  
  # Return a list containing the processed data
  return(list(
    top_10 = head(results, 10),
    summary_stats = summary_stats,
    all_results = results
  ))
}

# Create the EEZ mask with unique country IDs
eez_mask <- create_eez_mask(mpa_NT)

# Run the analysis with the country-specific EEZ constraint
tryCatch({
  random_results_NT <- run_random_mpa_analysis(puvsp_marine, mpa_NT, marine_mask_resampled, eez_mask, n_iterations = 100)
  print("Analysis for No-Take MPAs completed successfully.")
}, error = function(e) {
  print(paste("Error in No-Take MPAs analysis:", e$message))
})
[1] "Analysis for No-Take MPAs completed successfully."
Code
# Process results for both MPA types
processed_results <- list()

if (exists("random_results_NT")) {
  processed_results[["No-take MPAs"]] <- process_results(random_results_NT, "No-take MPAs")
}

Compare results

Compare results between the MPA network and the Null model of MPA placement

Code
# Load actual MPA coverage data
actual_coverage <- read.csv(here::here("Data", "species_mpa_coverage_NT.csv"))
colnames(actual_coverage)[2] <- c("NT")

# Load random MPA coverage results
random_NT <- read.csv(here::here("Outputs", "species_random_No-take MPAs_coverage.csv"))

# Reshape actual coverage data to long format (NT only)
actual_long <- actual_coverage %>%
  select(species, NT) %>%
  rename(actual_percentage = NT)

# Function to merge and compare actual vs random data
compare_coverage <- function(actual_data, random_data) {
  comparison <- actual_data %>%
    left_join(random_data, by = "species") %>%
    mutate(difference = actual_percentage - mean_percentage,
           z_score = (actual_percentage - mean_percentage) / sd_percentage)
  
  return(comparison)
}

# Create comparison dataframe for NT MPAs
comparison_NT <- compare_coverage(actual_long, random_NT)

# Function to summarize and print results
summarize_results <- function(comparison_data) {
  over_represented <- mean(comparison_data$difference > 0, na.rm = TRUE) * 100
  under_represented <- mean(comparison_data$difference < 0, na.rm = TRUE) * 100
  equally_represented <- 100 - over_represented - under_represented
  
  summary_df <- data.frame(
    Representation = c("Over-represented", "Under-represented", "Equally represented"),
    Percentage = round(c(over_represented, under_represented, equally_represented), 2)
  )
  
  kable(summary_df, format = "html", col.names = c("Representation", "Percentage (%)"),
        caption = "Summary for No-Take MPAs") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                  full_width = FALSE) %>%
    column_spec(2, width = "100px") %>%
    row_spec(0, bold = TRUE) %>%
    add_header_above(c(" " = 1, "Species" = 1)) %>%
    footnote(general = "Percentages may not sum to 100% due to rounding.")
}

# Summarize results
summarize_results(comparison_NT)
Summary for No-Take MPAs
Species
Representation Percentage (%)
Over-represented 23.93
Under-represented 63.87
Equally represented 12.20
Note:
Percentages may not sum to 100% due to rounding.
Code
# Identify significantly over/under-represented species
significant_species <- function(comparison_data, z_threshold = 1.96) {
  over_represented <- comparison_data %>%
    filter(z_score > z_threshold) %>%
    arrange(desc(z_score))
  
  under_represented <- comparison_data %>%
    filter(z_score < -z_threshold) %>%
    arrange(z_score)
  
  top_10_over <- over_represented %>%
    dplyr::select(species, actual_percentage, mean_percentage, difference, z_score) %>%
    head(10)
  
  top_10_under <- under_represented %>%
    dplyr::select(species, actual_percentage, mean_percentage, difference, z_score) %>%
    head(10)
  
  combined_table <- bind_rows(
    top_10_over,
    top_10_under
  ) %>%
    mutate(across(where(is.numeric), ~round(., 2)))
  
  kable_output <- kable(combined_table, format = "html",
                        col.names = c("Species", "Actual %", "Random %", "Difference", "SES"),
                        caption = "Top 10 significantly over/under-represented species in No-Take MPAs") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                  full_width = FALSE) %>%
    column_spec(1, width = "200px") %>%
    column_spec(2:5, width = "100px") %>%
    row_spec(0, bold = TRUE) %>%
    pack_rows("Over-represented", 1, 10, label_row_css = "background-color: #e6f3ff; color: #000;") %>%
    pack_rows("Under-represented", 11, 20, label_row_css = "background-color: #fff0e6; color: #000;")
  
  footnote_text <- paste(
    "Total significantly over-represented species:", nrow(over_represented), "\n",
    "Total significantly under-represented species:", nrow(under_represented)
  )
  
  kable_output <- kable_output %>%
    footnote(general = footnote_text, general_title = "Note:")
  
  # Save as PNG
  save_kable(kable_output,
             file = here::here("Outputs", "significant_species_NT.png"),
             zoom = 2)
  
  return(kable_output)
}

# Identify significant species and save as PNG
significant_species(comparison_NT)
Top 10 significantly over/under-represented species in No-Take MPAs
Species Actual % Random % Difference SES
Over-represented
Pseudoginglymostoma brevicaudatum 0.98 0.00 0.98 Inf
Narke capensis 0.78 0.00 0.78 Inf
Trigonognathus kabeyai 44.27 2.60 41.67 32.80
Apristurus spongiceps 28.74 2.44 26.30 26.41
Parmaturus albimarginatus 5.00 0.03 4.97 19.90
Parmaturus albipenis 5.56 0.06 5.50 14.07
Etmopterus villosus 34.21 2.29 31.92 13.91
Mobula tarapacana 1.15 0.97 0.18 10.35
Hemiscyllium galei 36.36 1.36 35.00 9.97
Carcharhinus galapagensis 6.30 2.97 3.33 9.56
Under-represented
Carcharhinus obscurus 3.13 6.05 -2.92 -13.82
Hypogaleus hyugaensis 5.69 16.07 -10.38 -13.69
Sphyrna lewini 1.58 2.99 -1.41 -13.66
Carcharias taurus 2.04 6.09 -4.05 -13.49
Sphyrna mokarran 1.41 2.58 -1.17 -12.15
Carcharhinus plumbeus 1.82 3.32 -1.50 -11.26
Sphyrna zygaena 0.99 2.68 -1.69 -11.18
Etmopterus lucifer 1.92 5.25 -3.33 -11.18
Amblyraja hyperborea 1.01 2.37 -1.35 -10.75
Heterodontus portusjacksoni 4.04 21.28 -17.25 -10.72
Note:
Total significantly over-represented species: 133
Total significantly under-represented species: 365
Code
# Stacked bar chart
# Create a summary dataframe for plotting
summary_NT <- data.frame(
  Representation = c("Under-represented", "Equally represented", "Over-represented"),
  Percentage = c(
    mean(comparison_NT$difference < 0, na.rm = TRUE) * 100,
    100 - mean(comparison_NT$difference > 0, na.rm = TRUE) * 100 - mean(comparison_NT$difference < 0, na.rm = TRUE) * 100,
    mean(comparison_NT$difference > 0, na.rm = TRUE) * 100
  )
)

# Order the factor levels in reverse for proper stacking
summary_NT$Representation <- factor(summary_NT$Representation,
                                    levels = c("Over-represented", "Equally represented", "Under-represented"))

# Create the horizontal stacked barplot
stacked_bar_chart <- ggplot(summary_NT, aes(x = 1, y = Percentage, fill = Representation)) +
  geom_bar(stat = "identity", position = "stack", width = 0.5) +
  scale_fill_manual(values = c("Over-represented" = "#56B4E9",
                                "Equally represented" = "#999999",
                                "Under-represented" = "#E69F00"),
                    breaks = c("Under-represented", "Equally represented", "Over-represented")) +
  geom_text(aes(label = sprintf("%.2f%%", Percentage)),
            position = position_stack(vjust = 0.5),
            color = "black", size = 4) +
  coord_flip() +
  theme_minimal() +
  labs(
    x = NULL,
    y = "Percentage of species (%)"
  ) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 100)) +
  theme(
    legend.position = "top",
    legend.title = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank()
  )

# Display the plot
print(stacked_bar_chart)

Breakdown of under-represented species

Code
# Identify under-represented species (where actual coverage is less than random)
under_represented_NT <- comparison_NT %>%
  filter(difference < 0) %>%
  arrange(difference) %>%
  dplyr::select(species, actual_percentage, mean_percentage, difference, z_score)

# Count the number of under-represented species
n_under_NT <- nrow(under_represented_NT)

# Save the list of under-represented species to CSV file
write.csv(under_represented_NT, 
          here::here("Outputs", "under_represented_species_NT_MPAs.csv"), 
          row.names = FALSE)

# Create summary data frame with additional information
under_rep_NT_summary <- under_represented_NT %>%
  mutate(protection_gap = mean_percentage - actual_percentage,
         representation_ratio = actual_percentage / mean_percentage) %>%
  arrange(desc(protection_gap))

# Save the enhanced summary data
write.csv(under_rep_NT_summary, 
          here::here("Outputs", "under_represented_species_NT_MPAs_summary.csv"), 
          row.names = FALSE)

## Compare with IUCN data 

# Define the function to prepare IUCN data
prepare_iucn_data <- function(species_data, species_col) {
  # Rename the species column in the input data to match
  species_data_renamed <- species_data %>%
    dplyr::rename(species = all_of(species_col))
  
  # Ensure we're only joining based on the species name
  species_data_slim <- species_data_renamed %>%
    dplyr::select(species)
  
  results_IUCN <- inner_join(IUCN, species_data_slim, by = "species") %>%
    mutate(IUCN_status = case_when(
      iucn_GE == 0 ~ "LC",
      iucn_GE == 1 ~ "NT",
      iucn_GE == 2 ~ "VU",
      iucn_GE == 3 ~ "EN",
      iucn_GE == 4 ~ "CR",
      TRUE ~ "Unknown"
    )) %>%
    filter(IUCN_status != "Unknown")
  
  return(results_IUCN)
}

# Get species lists for each category
all_species <- actual_coverage %>% dplyr::select(species)
under_rep_NT_species <- under_represented_NT %>% dplyr::select(species)

# Use the prepare_iucn_data function to get IUCN statuses for each group separately
all_species_iucn <- prepare_iucn_data(all_species, "species")
under_rep_NT_iucn <- prepare_iucn_data(under_rep_NT_species, "species")

# Function to summarize IUCN status distribution
summarize_iucn <- function(data, title) {
  # Count species by IUCN status
  iucn_counts <- data %>%
    group_by(IUCN_status) %>%
    summarise(
      count = n(),
      .groups = "drop"
    ) %>%
    mutate(
      percentage = count / sum(count) * 100,
      title = title
    )
  
  # Order IUCN statuses correctly
  iucn_counts$IUCN_status <- factor(iucn_counts$IUCN_status,
                                    levels = c("LC", "NT", "VU", "EN", "CR"))
  
  return(iucn_counts)
}

# Get IUCN summaries for each dataset
all_iucn_summary <- summarize_iucn(all_species_iucn, "All Species")
under_NT_iucn_summary <- summarize_iucn(under_rep_NT_iucn, "Under-represented (No-take MPAs)")

# Print summaries to check
print(all_iucn_summary)
# A tibble: 5 × 4
  IUCN_status count percentage title      
  <fct>       <int>      <dbl> <chr>      
1 CR             79       8.14 All Species
2 EN            114      11.7  All Species
3 LC            496      51.1  All Species
4 NT            109      11.2  All Species
5 VU            173      17.8  All Species
Code
print(under_NT_iucn_summary)
# A tibble: 5 × 4
  IUCN_status count percentage title                           
  <fct>       <int>      <dbl> <chr>                           
1 CR             41       6.58 Under-represented (No-take MPAs)
2 EN             71      11.4  Under-represented (No-take MPAs)
3 LC            333      53.5  Under-represented (No-take MPAs)
4 NT             68      10.9  Under-represented (No-take MPAs)
5 VU            110      17.7  Under-represented (No-take MPAs)
Code
# Combine summaries
combined_iucn_summary <- bind_rows(
  all_iucn_summary,
  under_NT_iucn_summary
)

# Create a bar plot comparing IUCN distributions
iucn_plot_NT <- ggplot(combined_iucn_summary, aes(x = IUCN_status, y = percentage, fill = title)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(aes(label = paste0(round(percentage, 2), "%")),
            position = position_dodge(width = 0.9),
            vjust = -0.5,
            size = 3) +
  scale_fill_manual(values = c("All Species" = "grey70",
                                "Under-represented (No-take MPAs)" = "darkred")) +
  labs(
    x = "IUCN Red List threat status",
    y = "Percentage of species (%)",
    fill = ""
  ) +
  theme_classic() +
  theme(
    legend.position = "bottom",
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5)
  )

# Print the plot
print(iucn_plot_NT)

Code
# Save the plot
ggsave(here::here("Outputs", "under_represented_species_iucn_comparison_NT.png"),
       iucn_plot_NT, width = 10, height = 6, dpi = 300)

# Statistical test: Chi-square test to determine if there are significant differences
# in IUCN status distribution between all species and under-represented species
# Prepare contingency tables
prepare_contingency <- function(summary_df1, summary_df2) {
  # Ensure all IUCN categories are present in both datasets
  all_categories <- c("LC", "NT", "VU", "EN", "CR")
  
  # Fill in missing categories with zero
  for (cat in all_categories) {
    if (!cat %in% summary_df1$IUCN_status) {
      summary_df1 <- bind_rows(summary_df1,
                              data.frame(IUCN_status = cat, count = 0,
                                         percentage = 0, title = unique(summary_df1$title)))
    }
    
    if (!cat %in% summary_df2$IUCN_status) {
      summary_df2 <- bind_rows(summary_df2,
                              data.frame(IUCN_status = cat, count = 0,
                                         percentage = 0, title = unique(summary_df2$title)))
    }
  }
  
  # Order and extract counts
  df1_ordered <- summary_df1 %>%
    arrange(factor(IUCN_status, levels = all_categories)) %>%
    pull(count)
  
  df2_ordered <- summary_df2 %>%
    arrange(factor(IUCN_status, levels = all_categories)) %>%
    pull(count)
  
  # Create contingency table
  return(rbind(df1_ordered, df2_ordered))
}

# Perform chi-square test
contingency_NT <- prepare_contingency(all_iucn_summary, under_NT_iucn_summary)
rownames(contingency_NT) <- c("All Species", "Under-represented (No-take MPAs)")
colnames(contingency_NT) <- c("LC", "NT", "VU", "EN", "CR")

# Print contingency table
cat("Contingency Table - No-take MPAs:\n")
Contingency Table - No-take MPAs:
Code
print(contingency_NT)
                                  LC  NT  VU  EN CR
All Species                      496 109 173 114 79
Under-represented (No-take MPAs) 333  68 110  71 41
Code
cat("\n")
Code
# Run chi-square test
chi_test_NT <- chisq.test(contingency_NT)

# Print results
cat("Chi-square Test - No-take MPAs vs. All Species:\n")
Chi-square Test - No-take MPAs vs. All Species:
Code
print(chi_test_NT)

    Pearson's Chi-squared test

data:  contingency_NT
X-squared = 1.7057, df = 4, p-value = 0.7897
Code
cat("\n")
Code
# Create a summary table with the IUCN distribution
iucn_summary_table <- combined_iucn_summary %>%
  pivot_wider(
    id_cols = IUCN_status,
    names_from = title,
    values_from = c(count, percentage)
  ) %>%
  arrange(factor(IUCN_status, levels = c("LC", "NT", "VU", "EN", "CR")))

# Save the summary tables
write.csv(iucn_summary_table,
          here::here("Outputs", "under_represented_species_iucn_summary_NT.csv"),
          row.names = FALSE)

Map results of the GAP analyses

Map mean Standardised Effect Sizes of MPA coverage

Code
# Get land polygons from rnaturalearth
land <- ne_countries(scale = "medium", returnclass = "sf")

# Create template raster
template_raster <- rast(ext(High_seas), resolution = 0.5)

# Rasterize high seas polygon
highseas_raster <- rasterize(vect(High_seas), template_raster, field = 1, background = 0)

# Plot to verify
#plot(highseas_raster,
#     main = "High Seas (1) vs Continental Waters (0)",
#     col = c("lightblue", "darkblue"))

# For No-take MPAs
difference_sp <- comparison_NT[, c(1, 3, 6)]

# Calculate cleaned SES for each species in comparison_NT
comparison_NT <- comparison_NT %>%
  mutate(SES_clean = case_when(
    sd_percentage == 0 & actual_percentage == mean_percentage ~ 0,
    sd_percentage == 0 & actual_percentage > mean_percentage ~ 3,   # Cap at +3
    sd_percentage == 0 & actual_percentage < mean_percentage ~ -3,  # Cap at -3
    TRUE ~ (actual_percentage - mean_percentage) / sd_percentage
  ))

# Reshape puvsp_marine from wide to long format
puvsp_long <- puvsp_marine %>%
  pivot_longer(cols = -c(id, lon, lat), names_to = "species", values_to = "presence") %>%
  filter(!is.na(presence) & presence == 1)  # Only keep rows where species is present

# Join SES data to puvsp_long
combined_data_clean <- puvsp_long %>%
  left_join(comparison_NT %>% select(species, SES_clean), by = c("species" = "species"))

# Calculate mean SES per cell with cleaned values
mean_ses_per_cell_clean <- combined_data_clean %>%
  group_by(id, lon, lat) %>%
  summarise(mean_SES = mean(SES_clean, na.rm = TRUE), .groups = "drop")

# Check the cleaned results
#summary(mean_ses_per_cell_clean$mean_SES)

# Convert to terra SpatVector for extraction
ses_points_NT <- vect(as.data.frame(mean_ses_per_cell_clean),
                      geom = c("lon", "lat"),
                      crs = crs(highseas_raster))

# Extract values from highseas raster
highseas_values_NT <- terra::extract(highseas_raster, ses_points_NT)

# Add highseas classification back to the dataframe
mean_ses_per_cell_clean$is_highseas <- highseas_values_NT[, 2]
mean_ses_per_cell_clean$is_highseas[is.na(mean_ses_per_cell_clean$is_highseas)] <- 0  # Set NA to 0 (continental)

# Convert to sf for mapping
mean_ses_sf_NT <- st_as_sf(mean_ses_per_cell_clean, coords = c("lon", "lat"), crs = 4326)

# Filter for continental waters only
continental_points_NT <- mean_ses_sf_NT[mean_ses_sf_NT$is_highseas == 0, ]

# Define the projection
mcbryde_thomas_2 <- "+proj=mbt_s"

# Project datasets
continental_projected_NT <- st_transform(continental_points_NT, crs = mcbryde_thomas_2)
land_projected <- ne_countries(scale = "medium", returnclass = "sf") %>%
  st_transform(crs = mcbryde_thomas_2)

# Create the globe border
globe_bbox <- rbind(c(-180, -90), c(-180, 90),
                    c(180, 90), c(180, -90), c(-180, -90))
globe_border <- st_polygon(list(globe_bbox)) %>%
  st_sfc(crs = 4326) %>%
  st_sf(data.frame(rgn = 'globe', geom = .)) %>%
  smoothr::densify(max_distance = 0.5) %>%
  st_transform(crs = mcbryde_thomas_2)

# Create base theme
my_theme <- theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.direction = "horizontal",
    legend.box = "vertical",
    legend.margin = margin(t = 20, r = 0, b = 0, l = 0),
    legend.title = element_text(margin = margin(b = 10)),
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank()
  )

# Create plot for continental waters - No-take MPAs with McBryde-Thomas projection
continental_plot_NT <- ggplot() +
  geom_sf(data = continental_projected_NT, aes(color = mean_SES), size = 0.5, alpha = 0.7) +
  geom_sf(data = land_projected, fill = "lightgrey", color = "lightgrey", size = 0.1) +
  geom_sf(data = globe_border, fill = NA, color = "black", size = 0.5) +
  scale_color_gradient2(
    low = "#b2182b", mid = "#f0f0f0", high = "#2166ac", 
    midpoint = 0,
# Note: The color scale is manually limited to [-3, 3] to improve visual contrast.
# Values outside this range are squished to the nearest limit (using oob = scales::squish),
# which prevents extreme SES values (e.g., -10 or +5) from dominating the color scale.
    limits = c(-3, 3),
    oob = scales::squish,
    name = "Mean SES",
    guide = guide_colorbar(barwidth = 20, barheight = 0.5,
                          title.position = "top", title.hjust = 0.5)
  ) +
  coord_sf() +
  labs(
    x = NULL, y = NULL
  ) +
  my_theme

# Print the plot
print(continental_plot_NT)

Code
# Save the plot
ggsave(here::here("Outputs", "SES_map_continental_NT.png"),
       continental_plot_NT,
       width = 10,
       height = 6,
       dpi = 300,
       bg = "white")

#======================
# Calculate SES by Ecoregion
#======================

# Convert mean_ses_per_cell to sf for spatial operations
ses_grid_sf <- mean_ses_per_cell_clean %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4326)

# Spatial join with ecoregions
ses_grid_ecoregions <- st_join(ses_grid_sf, meow_ecos)

# Calculate mean SES by ecoregion (excluding areas outside ecoregions)
ecoregion_ses_stats <- ses_grid_ecoregions %>%
  st_drop_geometry() %>%
  filter(!is.na(ECOREGION)) %>%  # Exclude areas outside ecoregions
  group_by(ECOREGION) %>%
  summarize(
    PROVINCE = first(PROVINCE),
    REALM = first(REALM),
    mean_SES = mean(mean_SES, na.rm = TRUE),
    median_SES = median(mean_SES, na.rm = TRUE),
    sd_SES = sd(mean_SES, na.rm = TRUE),
    n_cells = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_SES))

# Print summary
cat("Top 10 Ecoregions by Mean SES:\n")
Top 10 Ecoregions by Mean SES:
Code
print(head(ecoregion_ses_stats, 10))
# A tibble: 10 × 7
   ECOREGION                   PROVINCE REALM mean_SES median_SES sd_SES n_cells
   <chr>                       <chr>    <chr>    <dbl>      <dbl>  <dbl>   <int>
 1 Cocos-Keeling/Christmas Is… Java Tr… Cent…     2.91       2.91     NA     272
 2 East Antarctic Dronning Ma… Contine… Sout…     2.77       2.77     NA      13
 3 Ross Sea                    Contine… Sout…     2.77       2.77     NA      36
 4 Chagos                      Central… West…     2.47       2.47     NA     207
 5 Phoenix/Tokelau/Northern C… Central… East…     2.30       2.30     NA     812
 6 East Antarctic Wilkes Land  Contine… Sout…     2.28       2.28     NA       2
 7 Hawaii                      Hawaii   East…     2.23       2.23     NA     924
 8 Line Islands                Central… East…     2.21       2.21     NA     487
 9 Ogasawara Islands           Tropica… Cent…     2.15       2.15     NA     404
10 Cocos Islands               Tropica… Trop…     2.11       2.11     NA     109
Code
cat("\nBottom 10 Ecoregions by Mean SES:\n")

Bottom 10 Ecoregions by Mean SES:
Code
print(tail(ecoregion_ses_stats, 10))
# A tibble: 10 × 7
   ECOREGION                   PROVINCE REALM mean_SES median_SES sd_SES n_cells
   <chr>                       <chr>    <chr>    <dbl>      <dbl>  <dbl>   <int>
 1 North and East Barents Sea  Arctic   Arct…    -7.09      -7.09     NA    1828
 2 Bouvet Island               Subanta… Sout…    -7.98      -7.98     NA       1
 3 Antarctic Peninsula         Scotia … Sout…    -8.20      -8.20     NA     258
 4 Baltic Sea                  Norther… Temp…    -8.68      -8.68     NA     228
 5 Chukchi Sea                 Arctic   Arct…    -8.74      -8.74     NA     390
 6 Beaufort Sea - continental… Arctic   Arct…    -8.78      -8.78     NA       2
 7 Weddell Sea                 Contine… Sout…    -8.87      -8.87     NA     174
 8 White Sea                   Arctic   Arct…    -9.91      -9.91     NA      62
 9 Amundsen/Bellingshausen Sea Contine… Sout…   -10.7      -10.7      NA     298
10 Peter the First Island      Subanta… Sout…   -10.7      -10.7      NA       5
Code
#======================
# Create SES ecoregion map
#======================

# Join SES stats back to spatial data
meow_ecos_ses_stats <- meow_ecos %>%
  left_join(ecoregion_ses_stats, by = "ECOREGION")

# Plot ecoregions SES map
ses_eco_map <- ggplot() +
  geom_sf(data = meow_ecos_ses_stats, aes(fill = mean_SES), color = NA) +
  geom_sf(data = land, fill = "darkgrey", color = "darkgrey", size = 0.1) +
  scale_fill_gradient2(
    name = "Mean SES",
    low = "#b2182b", mid = "#f0f0f0", high = "#2166ac", 
    midpoint = 0,
    na.value = "grey90",
    guide = guide_colorbar(barwidth = 20, barheight = 0.5,
                          title.position = "top", title.hjust = 0.5)
  ) +
  coord_sf(xlim = c(-180, 180), ylim = c(-90, 90), expand = FALSE) +
  theme_minimal() +
  labs(
    x = NULL, y = NULL
  ) +
  theme(
    legend.position = "bottom",
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank()
  )

# Print and save the map
print(ses_eco_map)

Code
ggsave(here("Outputs", "ecoregion_SES_map_NT_clean.png"), 
       ses_eco_map, width = 12, height = 8, dpi = 300)

# Top 20 Ecoregions by Mean SES
top_ses_plot <- ggplot(head(ecoregion_ses_stats, 20),
                       aes(x = reorder(ECOREGION, mean_SES), y = mean_SES)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_errorbar(aes(ymin = mean_SES - sd_SES,
                    ymax = mean_SES + sd_SES),
                width = 0.2) +
  coord_flip() +
  labs(
    x = NULL,
    y = "Mean SES"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8)
  )

# Bottom 20 Ecoregions by Mean SES
bottom_ses_plot <- ggplot(tail(ecoregion_ses_stats, 20),
                          aes(x = reorder(ECOREGION, mean_SES), y = mean_SES)) +
  geom_bar(stat = "identity", fill = "coral") +
  geom_errorbar(aes(ymin = mean_SES - sd_SES,
                    ymax = mean_SES + sd_SES),
                width = 0.2) +
  coord_flip() +
  labs(
    x = NULL,
    y = "Mean SES"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8)
  )

# Print and save plots
print(top_ses_plot)

Code
ggsave(here("Outputs", "top_ecoregions_SES_NT_clean.png"), 
       top_ses_plot, width = 10, height = 8, dpi = 300)

print(bottom_ses_plot)

Code
ggsave(here("Outputs", "bottom_ecoregions_SES_NT_clean.png"), 
       bottom_ses_plot, width = 10, height = 8, dpi = 300)

# Save ecoregion statistics
write.csv(ecoregion_ses_stats,
          here::here("Outputs", "ecoregion_SES_stats_NT.csv"),
          row.names = FALSE)

Relate to IUCN status

Relate the percentage of species range overlapped by MPAs to the IUCN Red List status with betaregression analysis

Code
# Join IUCN and results data
results_IUCN <- left_join(IUCN, actual_coverage, by = c("species"))

# Add IUCN status categories
results_IUCN <- results_IUCN %>%
  mutate(IUCN_status = case_when(
    iucn_GE == 0 ~ "LC",
    iucn_GE == 1 ~ "NT",
    iucn_GE == 2 ~ "VU",
    iucn_GE == 3 ~ "EN",
    iucn_GE == 4 ~ "CR",
    TRUE ~ "Unknown"
  ))

# Filter out the "Unknown" category
results_IUCN_filtered <- results_IUCN %>%
  filter(IUCN_status != "Unknown")

#======================
# Beta Regression for No-take MPAs
#======================

# Transform percentage to (0,1) with boundary correction for no-take MPAs
results_IUCN_filtered <- results_IUCN_filtered %>%
  mutate(
    n_obs = n(),
    mpa_coverage_beta_nt = (NT/100 * (n_obs - 1) + 0.5) / n_obs
  )

# Fit beta regression for no-take MPAs
mpa_beta_nt <- betareg(mpa_coverage_beta_nt ~ iucn_GE, data = results_IUCN_filtered)
summary(mpa_beta_nt)

Call:
betareg(formula = mpa_coverage_beta_nt ~ iucn_GE, data = results_IUCN_filtered)

Quantile residuals:
    Min      1Q  Median      3Q     Max 
-1.1021 -1.0143 -0.0737  0.5368  4.2224 

Coefficients (mean model with logit link):
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.47352    0.05500 -63.151   <2e-16 ***
iucn_GE     -0.03493    0.02232  -1.565    0.118    

Phi coefficients (precision model with identity link):
      Estimate Std. Error z value Pr(>|z|)    
(phi)  14.0207     0.8357   16.78   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Type of estimator: ML (maximum likelihood)
Log-likelihood:  2848 on 3 Df
Pseudo R-squared: 0.004397
Number of iterations: 14 (BFGS) + 1 (Fisher scoring) 
Code
# Get marginal effects
mpa_margins_nt <- margins(mpa_beta_nt)
summary(mpa_margins_nt)
  factor     AME     SE       z      p   lower  upper
 iucn_GE -0.0010 0.0006 -1.5609 0.1186 -0.0022 0.0003
Code
# Get predicted values by threat category
mpa_pred_nt <- predict(mpa_beta_nt,
                       newdata = data.frame(iucn_GE = 0:4),
                       type = "response") * 100

print("Predicted No-take MPA coverage by IUCN category:")
[1] "Predicted No-take MPA coverage by IUCN category:"
Code
print(data.frame(
  Category = 0:4,
  IUCN = c("LC", "NT", "VU", "EN", "CR"),
  Predicted_Coverage = round(mpa_pred_nt, 2)
))
  Category IUCN Predicted_Coverage
1        0   LC               3.01
2        1   NT               2.91
3        2   VU               2.81
4        3   EN               2.72
5        4   CR               2.63
Code
# Average decrease per category
avg_decrease_nt <- mean(diff(mpa_pred_nt))
print(paste("Average decrease per threat category:", round(avg_decrease_nt, 2), "%"))
[1] "Average decrease per threat category: -0.1 %"
Code
#======================
# Ridge Plot with Points - No-take MPAs
#======================

# Define IUCN colors and order
iucn_colors <- c(
  "CR" = "#D81E05",
  "EN" = "#FC7F3F",
  "VU" = "#FEC748",
  "NT" = "#58AFFF",
  "LC" = "#38AB38"
)

iucn_order <- c("LC", "NT", "VU", "EN", "CR")

# Prepare no-take MPA data for plotting
notake_mpa_iucn <- results_IUCN_filtered %>%
  dplyr::select(species, IUCN_status, NT) %>%
  rename(protection = NT)

# Create the ridge plot with colored points
rdplot_points <- ggplot(notake_mpa_iucn, 
                        aes(x = factor(IUCN_status, levels = iucn_order), 
                            y = protection)) +
  geom_jitter(aes(color = IUCN_status),
              width = 0.1, 
              size = 1.5, 
              alpha = 0.6) +
  # Mean and SD statistics
  stat_summary(fun = mean, 
               geom = "point", 
               size = 3, 
               color = "black") +
  stat_summary(fun.data = function(x) {
    mean_val <- mean(x, na.rm = TRUE)
    sd_val <- sd(x, na.rm = TRUE)
    return(data.frame(y = mean_val, 
                      ymin = max(0, mean_val - sd_val),
                      ymax = mean_val + sd_val))
  },
  geom = "errorbar", 
  width = 0.1, 
  color = "black") +
  labs(x = "IUCN Red List threat status", 
       y = "(%) Range within no-take MPAs") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none",
    panel.grid.major.x = element_blank()
  ) +
  scale_x_discrete(limits = iucn_order) +
  scale_color_manual(values = iucn_colors) +
  scale_y_sqrt(limits = c(0, 50),
               breaks = seq(0, 50, 10),
               labels = c("0", "10", "20", "30", "40", ""))

print(rdplot_points)

Code
# Add white background to the plot
rdplot_points_white <- rdplot_points + 
  theme(
    plot.background = element_rect(fill = "white", color = NA),
    panel.background = element_rect(fill = "white", color = NA)
  )

# Save PNG with white background
ggsave(
  filename = here::here("Outputs", "notake_mpa_protection_points.png"),
  plot = rdplot_points_white,
  width = 6,
  height = 5,
  dpi = 300,
  bg = "white"
)

# Summary statistics for no-take MPAs
summary_stats_nt <- results_IUCN_filtered %>%
  group_by(IUCN_status) %>%
  summarise(
    count = n(),
    mean = mean(NT, na.rm = TRUE),
    median = median(NT, na.rm = TRUE),
    sd = sd(NT, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(factor(IUCN_status, levels = iucn_order))

print("Summary statistics for No-take MPAs:")
[1] "Summary statistics for No-take MPAs:"
Code
print(summary_stats_nt)
# A tibble: 5 × 5
  IUCN_status count  mean median    sd
  <chr>       <int> <dbl>  <dbl> <dbl>
1 LC            514  3.63  0.961  7.07
2 NT            118  2.03  0.878  3.21
3 VU            179  2.07  0.872  4.05
4 EN            121  1.58  0.834  2.59
5 CR             83  1.41  0.826  1.88

Grid of Fig. 1

Code
final_plot_combined = ggpubr::ggarrange(
  rdplot_points_white,                       
  nt_map_projected,
  stacked_bar_chart,                  
  continental_plot_NT,
  nrow = 4,
  ncol = 1, 
  labels = c("(A)", "(B)", "(C)", "(D)"),
  heights = c(0.8, 1.5, 0.4, 1.5), 
  font.label = list(size = 16, face = "bold"),
  vjust = 1,
  hjust = -0.1)

print(final_plot_combined)

Code
# Define the output path using `here`
output_path <- here::here("Outputs", "Fig_1_combined_points_3.png")

# Save the final plot using `ggsave`
ggsave(
  filename = output_path,    # File path for saving
  plot = final_plot_combined,         # The combined plot
  width = 8,                 # Width of the output (in inches)
  height = 17.35,               # Height of the output (in inches)
  dpi = 300, # Resolution (300 DPI for high quality)
  bg = "white", 
)

# Display the saved image in the Quarto document
knitr::include_graphics(here::here("Outputs", "Fig_1_combined_points_3.png"))